In [1]:
import requests
import csv
In [2]:
###Create string name list from avaliable fields: http://www.rcsb.org/pdb/results/reportField.do
###Custom Report Web Services General info: http://www.rcsb.org/pdb/software/wsreport.do
se= "ndbId"
string_names = "classification,experimentalTechnique,macromoleculeType,residueCount,resolution,"+\
"structureMolecularWeight,"+\
"crystallizationMethod,crystallizationTempK,densityMatthews,densityPercentSol,"+\
"pdbxDetails,phValue,publicationYear"
sequences_string_names = "sequence,residueCount,macromoleculeType"
In [3]:
#Main Pull
payload = {'pdbids': '*','service': 'wsfile', 'format': 'csv', 'primaryOnly': '1', 'CustomReportColumns':string_names}
r = requests.get('http://www.rcsb.org/pdb/rest/customReport', params=payload)
In [4]:
r.url
Out[4]:
In [5]:
r.text.splitlines()[0]
Out[5]:
In [6]:
string_names.split(",")
Out[6]:
In [7]:
#writing the main pull
output_reader = csv.reader(r.text.splitlines())
with open('pdb_data_no_dups.csv', 'w') as csvfile:
csv_writer = csv.writer(csvfile)
for row in output_reader:
csv_writer.writerow(row)
In [8]:
len(r.text.splitlines())
Out[8]:
In [9]:
#sequence pull
payload_seq = {'pdbids': '*','service': 'wsfile', 'format': 'csv', 'primaryOnly': '1', 'CustomReportColumns':sequences_string_names}
r_seq = requests.get('http://www.rcsb.org/pdb/rest/customReport', params=payload_seq)
In [10]:
#write sequence pull
output_reader_seq = csv.reader(r_seq.text.splitlines())
with open('pdb_data_seq.csv', 'w') as csvfile:
csv_writer_seq = csv.writer(csvfile)
for row in output_reader_seq:
csv_writer_seq.writerow(row)
reports = "StructureSummary,Sequence,Ligands,BindingAffinity,BiologicalDetails,ClusterEntity,"+\ "Domains,Crystallization,UnitCellDimensions,DataCollectionDetails,RefinementDetails"+\ "refinementParameters,NmrSoftware,NmrSpectrometer,NMRExperimentalSampleConditions,NmrRepresentative"+\ "NMRRefinement,NmrEnsemble,EMStructure,Citation,OtherCitations,SGProject"
payload_all = {'pdbids': '*','service': 'wsfile', 'format': 'csv', 'primaryOnly': '1', 'reportName':reports} r_all = requests.get('http://www.rcsb.org/pdb/rest/customReport', params=payload_all) output_reader_all = csv.reader(r_all.text.splitlines()) with open('pdb_data_all.csv', 'wb') as csvfile: csv_writer = csv.writer(csvfile) for row in output_reader: csv_writer.writerow(row)
r_all.url
In [ ]: